資料預處理


In [2]:
import numpy as np
import pandas as pd
import os

filepath = r'C:\Users\USER\Desktop'
filename01 = 'creditcard.csv'

df_full = pd.read_csv(os.path.join(filepath, filename01))

In [3]:
df_full


Out[3]:
Time V1 V2 V3 V4 V5 V6 V7 V8 V9 ... V21 V22 V23 V24 V25 V26 V27 V28 Amount Class
0 0.0 -1.359807 -0.072781 2.536347 1.378155 -0.338321 0.462388 0.239599 0.098698 0.363787 ... -0.018307 0.277838 -0.110474 0.066928 0.128539 -0.189115 0.133558 -0.021053 149.62 0
1 0.0 1.191857 0.266151 0.166480 0.448154 0.060018 -0.082361 -0.078803 0.085102 -0.255425 ... -0.225775 -0.638672 0.101288 -0.339846 0.167170 0.125895 -0.008983 0.014724 2.69 0
2 1.0 -1.358354 -1.340163 1.773209 0.379780 -0.503198 1.800499 0.791461 0.247676 -1.514654 ... 0.247998 0.771679 0.909412 -0.689281 -0.327642 -0.139097 -0.055353 -0.059752 378.66 0
3 1.0 -0.966272 -0.185226 1.792993 -0.863291 -0.010309 1.247203 0.237609 0.377436 -1.387024 ... -0.108300 0.005274 -0.190321 -1.175575 0.647376 -0.221929 0.062723 0.061458 123.50 0
4 2.0 -1.158233 0.877737 1.548718 0.403034 -0.407193 0.095921 0.592941 -0.270533 0.817739 ... -0.009431 0.798278 -0.137458 0.141267 -0.206010 0.502292 0.219422 0.215153 69.99 0
5 2.0 -0.425966 0.960523 1.141109 -0.168252 0.420987 -0.029728 0.476201 0.260314 -0.568671 ... -0.208254 -0.559825 -0.026398 -0.371427 -0.232794 0.105915 0.253844 0.081080 3.67 0
6 4.0 1.229658 0.141004 0.045371 1.202613 0.191881 0.272708 -0.005159 0.081213 0.464960 ... -0.167716 -0.270710 -0.154104 -0.780055 0.750137 -0.257237 0.034507 0.005168 4.99 0
7 7.0 -0.644269 1.417964 1.074380 -0.492199 0.948934 0.428118 1.120631 -3.807864 0.615375 ... 1.943465 -1.015455 0.057504 -0.649709 -0.415267 -0.051634 -1.206921 -1.085339 40.80 0
8 7.0 -0.894286 0.286157 -0.113192 -0.271526 2.669599 3.721818 0.370145 0.851084 -0.392048 ... -0.073425 -0.268092 -0.204233 1.011592 0.373205 -0.384157 0.011747 0.142404 93.20 0
9 9.0 -0.338262 1.119593 1.044367 -0.222187 0.499361 -0.246761 0.651583 0.069539 -0.736727 ... -0.246914 -0.633753 -0.120794 -0.385050 -0.069733 0.094199 0.246219 0.083076 3.68 0
10 10.0 1.449044 -1.176339 0.913860 -1.375667 -1.971383 -0.629152 -1.423236 0.048456 -1.720408 ... -0.009302 0.313894 0.027740 0.500512 0.251367 -0.129478 0.042850 0.016253 7.80 0
11 10.0 0.384978 0.616109 -0.874300 -0.094019 2.924584 3.317027 0.470455 0.538247 -0.558895 ... 0.049924 0.238422 0.009130 0.996710 -0.767315 -0.492208 0.042472 -0.054337 9.99 0
12 10.0 1.249999 -1.221637 0.383930 -1.234899 -1.485419 -0.753230 -0.689405 -0.227487 -2.094011 ... -0.231809 -0.483285 0.084668 0.392831 0.161135 -0.354990 0.026416 0.042422 121.50 0
13 11.0 1.069374 0.287722 0.828613 2.712520 -0.178398 0.337544 -0.096717 0.115982 -0.221083 ... -0.036876 0.074412 -0.071407 0.104744 0.548265 0.104094 0.021491 0.021293 27.50 0
14 12.0 -2.791855 -0.327771 1.641750 1.767473 -0.136588 0.807596 -0.422911 -1.907107 0.755713 ... 1.151663 0.222182 1.020586 0.028317 -0.232746 -0.235557 -0.164778 -0.030154 58.80 0
15 12.0 -0.752417 0.345485 2.057323 -1.468643 -1.158394 -0.077850 -0.608581 0.003603 -0.436167 ... 0.499625 1.353650 -0.256573 -0.065084 -0.039124 -0.087086 -0.180998 0.129394 15.99 0
16 12.0 1.103215 -0.040296 1.267332 1.289091 -0.735997 0.288069 -0.586057 0.189380 0.782333 ... -0.024612 0.196002 0.013802 0.103758 0.364298 -0.382261 0.092809 0.037051 12.99 0
17 13.0 -0.436905 0.918966 0.924591 -0.727219 0.915679 -0.127867 0.707642 0.087962 -0.665271 ... -0.194796 -0.672638 -0.156858 -0.888386 -0.342413 -0.049027 0.079692 0.131024 0.89 0
18 14.0 -5.401258 -5.450148 1.186305 1.736239 3.049106 -1.763406 -1.559738 0.160842 1.233090 ... -0.503600 0.984460 2.458589 0.042119 -0.481631 -0.621272 0.392053 0.949594 46.80 0
19 15.0 1.492936 -1.029346 0.454795 -1.438026 -1.555434 -0.720961 -1.080664 -0.053127 -1.978682 ... -0.177650 -0.175074 0.040002 0.295814 0.332931 -0.220385 0.022298 0.007602 5.00 0
20 16.0 0.694885 -1.361819 1.029221 0.834159 -1.191209 1.309109 -0.878586 0.445290 -0.446196 ... -0.295583 -0.571955 -0.050881 -0.304215 0.072001 -0.422234 0.086553 0.063499 231.71 0
21 17.0 0.962496 0.328461 -0.171479 2.109204 1.129566 1.696038 0.107712 0.521502 -1.191311 ... 0.143997 0.402492 -0.048508 -1.371866 0.390814 0.199964 0.016371 -0.014605 34.09 0
22 18.0 1.166616 0.502120 -0.067300 2.261569 0.428804 0.089474 0.241147 0.138082 -0.989162 ... 0.018702 -0.061972 -0.103855 -0.370415 0.603200 0.108556 -0.040521 -0.011418 2.28 0
23 18.0 0.247491 0.277666 1.185471 -0.092603 -1.314394 -0.150116 -0.946365 -1.617935 1.544071 ... 1.650180 0.200454 -0.185353 0.423073 0.820591 -0.227632 0.336634 0.250475 22.75 0
24 22.0 -1.946525 -0.044901 -0.405570 -1.013057 2.941968 2.955053 -0.063063 0.855546 0.049967 ... -0.579526 -0.799229 0.870300 0.983421 0.321201 0.149650 0.707519 0.014600 0.89 0
25 22.0 -2.074295 -0.121482 1.322021 0.410008 0.295198 -0.959537 0.543985 -0.104627 0.475664 ... -0.403639 -0.227404 0.742435 0.398535 0.249212 0.274404 0.359969 0.243232 26.43 0
26 23.0 1.173285 0.353498 0.283905 1.133563 -0.172577 -0.916054 0.369025 -0.327260 -0.246651 ... 0.067003 0.227812 -0.150487 0.435045 0.724825 -0.337082 0.016368 0.030041 41.88 0
27 23.0 1.322707 -0.174041 0.434555 0.576038 -0.836758 -0.831083 -0.264905 -0.220982 -1.071425 ... -0.284376 -0.323357 -0.037710 0.347151 0.559639 -0.280158 0.042335 0.028822 16.00 0
28 23.0 -0.414289 0.905437 1.727453 1.473471 0.007443 -0.200331 0.740228 -0.029247 -0.593392 ... 0.077237 0.457331 -0.038500 0.642522 -0.183891 -0.277464 0.182687 0.152665 33.00 0
29 23.0 1.059387 -0.175319 1.266130 1.186110 -0.786002 0.578435 -0.767084 0.401046 0.699500 ... 0.013676 0.213734 0.014462 0.002951 0.294638 -0.395070 0.081461 0.024220 12.99 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
284777 172764.0 2.079137 -0.028723 -1.343392 0.358000 -0.045791 -1.345452 0.227476 -0.378355 0.665911 ... 0.235758 0.829758 -0.002063 0.001344 0.262183 -0.105327 -0.022363 -0.060283 1.00 0
284778 172764.0 -0.764523 0.588379 -0.907599 -0.418847 0.901528 -0.760802 0.758545 0.414698 -0.730854 ... 0.003530 -0.431876 0.141759 0.587119 -0.200998 0.267337 -0.152951 -0.065285 80.00 0
284779 172766.0 1.975178 -0.616244 -2.628295 -0.406246 2.327804 3.664740 -0.533297 0.842937 1.128798 ... 0.086043 0.543613 -0.032129 0.768379 0.477688 -0.031833 0.014151 -0.066542 25.00 0
284780 172766.0 -1.727503 1.108356 2.219561 1.148583 -0.884199 0.793083 -0.527298 0.866429 0.853819 ... -0.094708 0.236818 -0.204280 1.158185 0.627801 -0.399981 0.510818 0.233265 30.00 0
284781 172766.0 -1.139015 -0.155510 1.894478 -1.138957 1.451777 0.093598 0.191353 0.092211 -0.062621 ... -0.191027 -0.631658 -0.147249 0.212931 0.354257 -0.241068 -0.161717 -0.149188 13.00 0
284782 172767.0 -0.268061 2.540315 -1.400915 4.846661 0.639105 0.186479 -0.045911 0.936448 -2.419986 ... -0.263889 -0.857904 0.235172 -0.681794 -0.668894 0.044657 -0.066751 -0.072447 12.82 0
284783 172768.0 -1.796092 1.929178 -2.828417 -1.689844 2.199572 3.123732 -0.270714 1.657495 0.465804 ... 0.271170 1.145750 0.084783 0.721269 -0.529906 -0.240117 0.129126 -0.080620 11.46 0
284784 172768.0 -0.669662 0.923769 -1.543167 -1.560729 2.833960 3.240843 0.181576 1.282746 -0.893890 ... 0.183856 0.202670 -0.373023 0.651122 1.073823 0.844590 -0.286676 -0.187719 40.00 0
284785 172768.0 0.032887 0.545338 -1.185844 -1.729828 2.932315 3.401529 0.337434 0.925377 -0.165663 ... -0.266113 -0.716336 0.108519 0.688519 -0.460220 0.161939 0.265368 0.090245 1.79 0
284786 172768.0 -2.076175 2.142238 -2.522704 -1.888063 1.982785 3.732950 -1.217430 -0.536644 0.272867 ... 2.016666 -1.588269 0.588482 0.632444 -0.201064 0.199251 0.438657 0.172923 8.95 0
284787 172769.0 -1.029719 -1.110670 -0.636179 -0.840816 2.424360 -2.956733 0.283610 -0.332656 -0.247488 ... 0.353722 0.488487 0.293632 0.107812 -0.935586 1.138216 0.025271 0.255347 9.99 0
284788 172770.0 2.007418 -0.280235 -0.208113 0.335261 -0.715798 -0.751373 -0.458972 -0.140140 0.959971 ... -0.208260 -0.430347 0.416765 0.064819 -0.608337 0.268436 -0.028069 -0.041367 3.99 0
284789 172770.0 -0.446951 1.302212 -0.168583 0.981577 0.578957 -0.605641 1.253430 -1.042610 -0.417116 ... 0.851800 0.305268 -0.148093 -0.038712 0.010209 -0.362666 0.503092 0.229921 60.50 0
284790 172771.0 -0.515513 0.971950 -1.014580 -0.677037 0.912430 -0.316187 0.396137 0.532364 -0.224606 ... -0.280302 -0.849919 0.300245 0.000607 -0.376379 0.128660 -0.015205 -0.021486 9.81 0
284791 172774.0 -0.863506 0.874701 0.420358 -0.530365 0.356561 -1.046238 0.757051 0.230473 -0.506856 ... -0.108846 -0.480820 -0.074513 -0.003988 -0.113149 0.280378 -0.077310 0.023079 20.32 0
284792 172774.0 -0.724123 1.485216 -1.132218 -0.607190 0.709499 -0.482638 0.548393 0.343003 -0.226323 ... 0.414621 1.307511 -0.059545 0.242669 -0.665424 -0.269869 -0.170579 -0.030692 3.99 0
284793 172775.0 1.971002 -0.699067 -1.697541 -0.617643 1.718797 3.911336 -1.259306 1.056209 1.315006 ... 0.188758 0.694418 0.163002 0.726365 -0.058282 -0.191813 0.061858 -0.043716 4.99 0
284794 172777.0 -1.266580 -0.400461 0.956221 -0.723919 1.531993 -1.788600 0.314741 0.004704 0.013857 ... -0.157831 -0.883365 0.088485 -0.076790 -0.095833 0.132720 -0.028468 0.126494 0.89 0
284795 172778.0 -12.516732 10.187818 -8.476671 -2.510473 -4.586669 -1.394465 -3.632516 5.498583 4.893089 ... -0.944759 -1.565026 0.890675 -1.253276 1.786717 0.320763 2.090712 1.232864 9.87 0
284796 172780.0 1.884849 -0.143540 -0.999943 1.506772 -0.035300 -0.613638 0.190241 -0.249058 0.666458 ... 0.144008 0.634646 -0.042114 -0.053206 0.316403 -0.461441 0.018265 -0.041068 60.00 0
284797 172782.0 -0.241923 0.712247 0.399806 -0.463406 0.244531 -1.343668 0.929369 -0.206210 0.106234 ... -0.228876 -0.514376 0.279598 0.371441 -0.559238 0.113144 0.131507 0.081265 5.49 0
284798 172782.0 0.219529 0.881246 -0.635891 0.960928 -0.152971 -1.014307 0.427126 0.121340 -0.285670 ... 0.099936 0.337120 0.251791 0.057688 -1.508368 0.144023 0.181205 0.215243 24.05 0
284799 172783.0 -1.775135 -0.004235 1.189786 0.331096 1.196063 5.519980 -1.518185 2.080825 1.159498 ... 0.103302 0.654850 -0.348929 0.745323 0.704545 -0.127579 0.454379 0.130308 79.99 0
284800 172784.0 2.039560 -0.175233 -1.196825 0.234580 -0.008713 -0.726571 0.017050 -0.118228 0.435402 ... -0.268048 -0.717211 0.297930 -0.359769 -0.315610 0.201114 -0.080826 -0.075071 2.68 0
284801 172785.0 0.120316 0.931005 -0.546012 -0.745097 1.130314 -0.235973 0.812722 0.115093 -0.204064 ... -0.314205 -0.808520 0.050343 0.102800 -0.435870 0.124079 0.217940 0.068803 2.69 0
284802 172786.0 -11.881118 10.071785 -9.834783 -2.066656 -5.364473 -2.606837 -4.918215 7.305334 1.914428 ... 0.213454 0.111864 1.014480 -0.509348 1.436807 0.250034 0.943651 0.823731 0.77 0
284803 172787.0 -0.732789 -0.055080 2.035030 -0.738589 0.868229 1.058415 0.024330 0.294869 0.584800 ... 0.214205 0.924384 0.012463 -1.016226 -0.606624 -0.395255 0.068472 -0.053527 24.79 0
284804 172788.0 1.919565 -0.301254 -3.249640 -0.557828 2.630515 3.031260 -0.296827 0.708417 0.432454 ... 0.232045 0.578229 -0.037501 0.640134 0.265745 -0.087371 0.004455 -0.026561 67.88 0
284805 172788.0 -0.240440 0.530483 0.702510 0.689799 -0.377961 0.623708 -0.686180 0.679145 0.392087 ... 0.265245 0.800049 -0.163298 0.123205 -0.569159 0.546668 0.108821 0.104533 10.00 0
284806 172792.0 -0.533413 -0.189733 0.703337 -0.506271 -0.012546 -0.649617 1.577006 -0.414650 0.486180 ... 0.261057 0.643078 0.376777 0.008797 -0.473649 -0.818267 -0.002415 0.013649 217.00 0

284807 rows × 31 columns


In [4]:
df_full.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 284807 entries, 0 to 284806
Data columns (total 31 columns):
Time      284807 non-null float64
V1        284807 non-null float64
V2        284807 non-null float64
V3        284807 non-null float64
V4        284807 non-null float64
V5        284807 non-null float64
V6        284807 non-null float64
V7        284807 non-null float64
V8        284807 non-null float64
V9        284807 non-null float64
V10       284807 non-null float64
V11       284807 non-null float64
V12       284807 non-null float64
V13       284807 non-null float64
V14       284807 non-null float64
V15       284807 non-null float64
V16       284807 non-null float64
V17       284807 non-null float64
V18       284807 non-null float64
V19       284807 non-null float64
V20       284807 non-null float64
V21       284807 non-null float64
V22       284807 non-null float64
V23       284807 non-null float64
V24       284807 non-null float64
V25       284807 non-null float64
V26       284807 non-null float64
V27       284807 non-null float64
V28       284807 non-null float64
Amount    284807 non-null float64
Class     284807 non-null int64
dtypes: float64(30), int64(1)
memory usage: 67.4 MB

In [5]:
df_full[df_full.Class ==1]


Out[5]:
Time V1 V2 V3 V4 V5 V6 V7 V8 V9 ... V21 V22 V23 V24 V25 V26 V27 V28 Amount Class
541 406.0 -2.312227 1.951992 -1.609851 3.997906 -0.522188 -1.426545 -2.537387 1.391657 -2.770089 ... 0.517232 -0.035049 -0.465211 0.320198 0.044519 0.177840 0.261145 -0.143276 0.00 1
623 472.0 -3.043541 -3.157307 1.088463 2.288644 1.359805 -1.064823 0.325574 -0.067794 -0.270953 ... 0.661696 0.435477 1.375966 -0.293803 0.279798 -0.145362 -0.252773 0.035764 529.00 1
4920 4462.0 -2.303350 1.759247 -0.359745 2.330243 -0.821628 -0.075788 0.562320 -0.399147 -0.238253 ... -0.294166 -0.932391 0.172726 -0.087330 -0.156114 -0.542628 0.039566 -0.153029 239.93 1
6108 6986.0 -4.397974 1.358367 -2.592844 2.679787 -1.128131 -1.706536 -3.496197 -0.248778 -0.247768 ... 0.573574 0.176968 -0.436207 -0.053502 0.252405 -0.657488 -0.827136 0.849573 59.00 1
6329 7519.0 1.234235 3.019740 -4.304597 4.732795 3.624201 -1.357746 1.713445 -0.496358 -1.282858 ... -0.379068 -0.704181 -0.656805 -1.632653 1.488901 0.566797 -0.010016 0.146793 1.00 1
6331 7526.0 0.008430 4.137837 -6.240697 6.675732 0.768307 -3.353060 -1.631735 0.154612 -2.795892 ... 0.364514 -0.608057 -0.539528 0.128940 1.488481 0.507963 0.735822 0.513574 1.00 1
6334 7535.0 0.026779 4.132464 -6.560600 6.348557 1.329666 -2.513479 -1.689102 0.303253 -3.139409 ... 0.370509 -0.576752 -0.669605 -0.759908 1.605056 0.540675 0.737040 0.496699 1.00 1
6336 7543.0 0.329594 3.712889 -5.775935 6.078266 1.667359 -2.420168 -0.812891 0.133080 -2.214311 ... 0.156617 -0.652450 -0.551572 -0.716522 1.415717 0.555265 0.530507 0.404474 1.00 1
6338 7551.0 0.316459 3.809076 -5.615159 6.047445 1.554026 -2.651353 -0.746579 0.055586 -2.678679 ... 0.208828 -0.511747 -0.583813 -0.219845 1.474753 0.491192 0.518868 0.402528 1.00 1
6427 7610.0 0.725646 2.300894 -5.329976 4.007683 -1.730411 -1.732193 -3.968593 1.063728 -0.486097 ... 0.589669 0.109541 0.601045 -0.364700 -1.843078 0.351909 0.594550 0.099372 1.00 1
6446 7672.0 0.702710 2.426433 -5.234513 4.416661 -2.170806 -2.667554 -3.878088 0.911337 -0.166199 ... 0.551180 -0.009802 0.721698 0.473246 -1.959304 0.319476 0.600485 0.129305 1.00 1
6472 7740.0 1.023874 2.001485 -4.769752 3.819195 -1.271754 -1.734662 -3.059245 0.889805 0.415382 ... 0.343283 -0.054196 0.709654 -0.372216 -2.032068 0.366778 0.395171 0.020206 1.00 1
6529 7891.0 -1.585505 3.261585 -4.137422 2.357096 -1.405043 -1.879437 -3.513687 1.515607 -1.207166 ... 0.501543 -0.546869 -0.076584 -0.425550 0.123644 0.321985 0.264028 0.132817 1.00 1
6609 8090.0 -1.783229 3.402794 -3.822742 2.625368 -1.976415 -2.731689 -3.430559 1.413204 -0.776941 ... 0.454032 -0.577526 0.045967 0.461700 0.044146 0.305704 0.530981 0.243746 1.00 1
6641 8169.0 0.857321 4.093912 -7.423894 7.380245 0.973366 -2.730762 -1.496497 0.543015 -2.351190 ... 0.375026 0.145400 0.240603 -0.234649 -1.004881 0.435832 0.618324 0.148469 1.00 1
6717 8408.0 -1.813280 4.917851 -5.926130 5.701500 1.204393 -3.035138 -1.713402 0.561257 -3.796354 ... 0.615642 -0.406427 -0.737018 -0.279642 1.106766 0.323885 0.894767 0.569519 1.00 1
6719 8415.0 -0.251471 4.313523 -6.891438 6.796797 0.616297 -2.966327 -2.436653 0.489328 -3.371639 ... 0.536892 -0.546126 -0.605240 -0.263743 1.539916 0.523574 0.891025 0.572741 1.00 1
6734 8451.0 0.314597 2.660670 -5.920037 4.522500 -2.315027 -2.278352 -4.684054 1.202270 -0.694696 ... 0.743314 0.064038 0.677842 0.083008 -1.911034 0.322188 0.620867 0.185030 1.00 1
6774 8528.0 0.447396 2.481954 -5.660814 4.455923 -2.443780 -2.185040 -4.716143 1.249803 -0.718326 ... 0.756053 0.140168 0.665411 0.131464 -1.908217 0.334808 0.748534 0.175414 1.00 1
6820 8614.0 -2.169929 3.639654 -4.508498 2.730668 -2.122693 -2.341017 -4.235253 1.703538 -1.305279 ... 0.645103 -0.503529 -0.000523 0.071696 0.092007 0.308498 0.552591 0.298954 1.00 1
6870 8757.0 -1.863756 3.442644 -4.468260 2.805336 -2.118412 -2.332285 -4.261237 1.701682 -1.439396 ... 0.667927 -0.516242 -0.012218 0.070614 0.058504 0.304883 0.418012 0.208858 1.00 1
6882 8808.0 -4.617217 1.695694 -3.114372 4.328199 -1.873257 -0.989908 -4.577265 0.472216 0.472017 ... 0.481830 0.146023 0.117039 -0.217565 -0.138776 -0.424453 -1.002041 0.890780 1.10 1
6899 8878.0 -2.661802 5.856393 -7.653616 6.379742 -0.060712 -3.131550 -3.103570 1.778492 -3.831154 ... 0.734775 -0.435901 -0.384766 -0.286016 1.007934 0.413196 0.280284 0.303937 1.00 1
6903 8886.0 -2.535852 5.793644 -7.618463 6.395830 -0.065210 -3.136372 -3.104557 1.823233 -3.878658 ... 0.716720 -0.448060 -0.402407 -0.288835 1.011752 0.425965 0.413140 0.308205 1.00 1
6971 9064.0 -3.499108 0.258555 -4.489558 4.853894 -6.974522 3.628382 5.431271 -1.946734 -0.775680 ... -1.052368 0.204817 -2.119007 0.170279 -0.393844 0.296367 1.985913 -0.900452 1809.68 1
8296 11080.0 -2.125490 5.973556 -11.034727 9.007147 -1.689451 -2.854415 -7.810441 2.030870 -5.902828 ... 1.646518 -0.278485 -0.664841 -1.164555 1.701796 0.690806 2.119749 1.108933 1.00 1
8312 11092.0 0.378275 3.914797 -5.726872 6.094141 1.698875 -2.807314 -0.591118 -0.123496 -2.530713 ... 0.149896 -0.601967 -0.613724 -0.403114 1.568445 0.521884 0.527938 0.411910 1.00 1
8335 11131.0 -1.426623 4.141986 -9.804103 6.666273 -4.749527 -2.073129 -10.089931 2.791345 -3.249516 ... 1.865679 0.407809 0.605809 -0.769348 -1.746337 0.502040 1.977258 0.711607 1.00 1
8615 11629.0 -3.891192 7.098916 -11.426467 8.607557 -2.065706 -2.985288 -8.138589 2.973928 -6.272790 ... 1.757085 -0.189709 -0.508629 -1.189308 1.188536 0.605242 1.881529 0.875260 1.00 1
8617 11635.0 0.919137 4.199633 -7.535607 7.426940 1.118215 -2.886722 -1.341036 0.363933 -2.203224 ... 0.316094 0.055179 0.210692 -0.417918 -0.911188 0.466524 0.627393 0.157851 1.00 1
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
251891 155548.0 1.878230 1.325630 -2.333469 4.233151 1.355184 -0.853508 0.716025 -0.164910 -1.502345 ... -0.152131 -0.360736 0.043414 -0.242380 0.171098 -0.002601 -0.026667 0.005238 7.06 1
251904 155554.0 -1.040067 3.106703 -5.409027 3.109903 -0.887237 -2.497522 -2.073347 0.639818 -3.013331 ... 0.773961 0.214868 -0.184233 -0.284091 0.493467 0.732329 0.675067 0.337076 94.82 1
252124 155662.0 -1.928613 4.601506 -7.124053 5.716088 1.026579 -3.189073 -2.261897 1.185096 -4.441942 ... 0.602291 -0.541287 -0.354639 -0.701492 -0.030973 0.034070 0.573393 0.294686 0.77 1
252774 155965.0 -1.201398 4.864535 -8.328823 7.652399 -0.167445 -2.767695 -3.176421 1.623279 -4.367228 ... 0.532320 -0.556913 0.192444 -0.698588 0.025003 0.514968 0.378105 -0.053133 0.77 1
254344 156685.0 -0.129778 0.141547 -0.894702 -0.457662 0.810608 -0.504723 1.373588 -0.209476 0.208494 ... -0.032643 -0.246526 0.484108 0.359637 -0.435972 -0.248480 0.021527 0.109192 187.11 1
254395 156710.0 0.202402 1.176270 0.346379 2.882138 1.407133 -0.504355 1.438537 -0.395603 -1.555142 ... 0.242560 0.841230 -0.370157 -0.026012 0.491954 0.234576 -0.279788 -0.331933 7.59 1
255403 157207.0 1.170756 2.501038 -4.986159 5.374160 0.997798 -1.259004 -1.237689 0.358426 -2.612489 ... 0.123145 -0.713201 -0.080868 -0.964310 0.338568 0.068630 0.481588 0.268226 4.97 1
255556 157284.0 -0.242245 4.147186 -5.672349 6.493741 1.591168 -1.602523 -0.950463 0.722903 -4.128505 ... 0.249023 -0.480286 -0.286080 -1.153575 -0.035571 0.559628 0.409446 0.221048 0.77 1
258403 158638.0 -5.976119 -7.196980 -5.388316 5.104799 4.676533 -5.566870 -4.291180 0.876531 -1.075478 ... 1.459369 -0.136262 0.848177 -0.269916 -1.095060 -0.710905 0.565846 -1.034107 296.00 1
261056 159844.0 -0.408111 3.132944 -3.098030 5.803893 0.890609 -0.501474 -0.440054 0.591828 -3.267693 ... 0.098482 -0.538375 -0.217989 -1.042657 0.314389 0.543244 0.233851 0.119603 45.51 1
261473 160034.0 -2.349340 1.512604 -2.647497 1.753792 0.406328 -2.188494 -0.686935 -0.547984 -0.099528 ... -0.088519 -0.595178 0.258148 0.061901 -0.354180 -1.152671 -0.736073 0.733703 4.90 1
261925 160243.0 -2.783865 1.596824 -2.084844 2.512986 -1.446749 -0.828496 -0.732262 -0.203329 -0.347046 ... 0.203563 0.293268 0.199568 0.146868 0.163602 -0.624085 -1.333100 0.428634 156.00 1
262560 160537.0 0.567539 3.309385 -6.631268 6.394574 -0.054172 -2.396535 -2.792489 0.514811 -3.541780 ... 0.614221 -0.365047 -0.180409 -0.523271 0.645054 0.246466 0.902675 0.473571 4.69 1
262826 160665.0 -0.417340 4.700055 -7.521767 7.671884 0.260821 -2.646693 -2.854432 0.958783 -4.588536 ... 0.622200 -0.437708 -0.090358 -0.742802 -0.312361 0.502575 0.821390 0.372379 0.77 1
263080 160791.0 2.132386 0.705608 -3.530759 0.514779 1.527175 -1.716268 1.132791 -0.574214 0.128904 ... 0.163739 0.703910 -0.245076 0.460049 0.920281 -0.216586 -0.026219 -0.025001 1.00 1
263274 160870.0 -0.644278 5.002352 -8.252739 7.756915 -0.216267 -2.751496 -3.358857 1.406268 -4.403852 ... 0.587728 -0.605759 0.033746 -0.756170 -0.008172 0.532772 0.663970 0.192067 0.77 1
263324 160895.0 -0.848290 2.719882 -6.199070 3.044437 -3.301910 -1.992117 -3.734902 1.520079 -2.548788 ... 1.125229 0.805258 0.199119 0.035206 0.012159 0.601658 0.137468 -0.171397 127.14 1
263877 161154.0 -3.387601 3.977881 -6.978585 1.657766 -1.100500 -3.599487 -3.686651 1.942252 -3.065089 ... 1.043587 0.262189 -0.479224 -0.326638 -0.156939 0.113807 0.354124 0.287592 0.38 1
268375 163181.0 -5.238808 0.623013 -5.784507 1.678889 -0.364432 -0.477295 -4.276132 -0.695173 -2.971644 ... -0.326140 1.509239 -0.215966 -0.245727 0.893041 0.865758 0.854657 -0.964482 39.98 1
272521 165132.0 -7.503926 -0.360628 -3.830952 2.486103 2.497367 1.332437 -6.783964 -15.415385 0.465512 ... -6.389132 2.249964 1.670508 0.140450 0.162147 1.207731 1.268958 0.097538 12.31 1
274382 165981.0 -5.766879 -8.402154 0.056543 6.950983 9.880564 -5.773192 -5.748879 0.721743 -1.076274 ... 0.880395 -0.130436 2.241471 0.665346 -1.890041 -0.120803 0.073269 0.583799 0.00 1
274475 166028.0 -0.956390 2.361594 -3.171195 1.970759 0.474761 -1.902598 -0.055178 0.277831 -1.745854 ... 0.473211 0.719400 0.122458 -0.255650 -0.619259 -0.484280 0.683535 0.443299 39.90 1
275992 166831.0 -2.027135 -1.131890 -1.135194 1.086963 -0.010547 0.423797 3.790880 -1.155595 -0.063434 ... -0.315105 0.575520 0.490842 0.756502 -0.142685 -0.602777 0.508712 -0.091646 634.30 1
276071 166883.0 2.091900 -0.757459 -1.192258 -0.755458 -0.620324 -0.322077 -1.082511 0.117200 -0.140927 ... 0.288253 0.831939 0.142007 0.592615 -0.196143 -0.136676 0.020182 -0.015470 19.95 1
276864 167338.0 -1.374424 2.793185 -4.346572 2.400731 -1.688433 0.111136 -0.922038 -2.149930 -2.027474 ... -0.870779 0.504849 0.137994 0.368275 0.103137 -0.414209 0.454982 0.096711 349.08 1
279863 169142.0 -1.927883 1.125653 -4.518331 1.749293 -1.566487 -2.010494 -0.882850 0.697211 -2.064945 ... 0.778584 -0.319189 0.639419 -0.294885 0.537503 0.788395 0.292680 0.147968 390.00 1
280143 169347.0 1.378559 1.289381 -5.004247 1.411850 0.442581 -1.326536 -1.413170 0.248525 -1.127396 ... 0.370612 0.028234 -0.145640 -0.081049 0.521875 0.739467 0.389152 0.186637 0.76 1
280149 169351.0 -0.676143 1.126366 -2.213700 0.468308 -1.120541 -0.003346 -2.234739 1.210158 -0.652250 ... 0.751826 0.834108 0.190944 0.032070 -0.739695 0.471111 0.385107 0.194361 77.89 1
281144 169966.0 -3.113832 0.585864 -5.399730 1.817092 -0.840618 -2.943548 -2.208002 1.058733 -1.632333 ... 0.583276 -0.269209 -0.456108 -0.183659 -0.328168 0.606116 0.884876 -0.253700 245.00 1
281674 170348.0 1.991976 0.158476 -2.583441 0.408670 1.151147 -0.096695 0.223050 -0.068384 0.577829 ... -0.164350 -0.295135 -0.072173 -0.450261 0.313267 -0.289617 0.002988 -0.015309 42.53 1

492 rows × 31 columns

Shuffle


In [6]:
from sklearn.utils import shuffle

df_full.drop('Time', axis=1,  inplace = True)
shuffle_df = shuffle(df_full, random_state=42)

num_total_cases = len(df_full)
df_train = shuffle_df[0:227845]
df_test = shuffle_df[227845:]

In [7]:
train_feature = np.array(df_train.values[:,0:29])
train_label = np.array(df_train.values[:,-1])
test_feature = np.array(df_test.values[:,0:29])
test_label = np.array(df_test.values[:,-1])

In [8]:
train_feature


Out[8]:
array([[ -1.65265066e+01,   8.58497180e+00,  -1.86498532e+01, ...,
         -2.01857525e+00,  -1.04280417e+00,   3.64190000e+02],
       [  3.39812064e-01,  -2.74374524e+00,  -1.34069511e-01, ...,
          4.09958027e-02,   1.02037825e-01,   5.20120000e+02],
       [  1.39959027e+00,  -5.90701288e-01,   1.68618940e-01, ...,
          1.14086454e-02,   4.63414166e-03,   3.10000000e+01],
       ..., 
       [ -1.63679912e+00,  -2.88001171e-01,   3.04408460e+00, ...,
         -2.19883189e-02,   1.43121533e-01,   7.81300000e+01],
       [  1.23933977e+00,  -1.52563018e-01,  -1.03219327e-01, ...,
         -9.22603427e-02,  -2.78862601e-02,   2.00000000e+00],
       [  3.39286374e-02,   6.62453224e-01,  -1.99422851e+00, ...,
         -6.26921714e-01,  -2.67546624e-01,   7.60000000e-01]])

In [9]:
train_feature.shape


Out[9]:
(227845, 29)

In [10]:
train_label


Out[10]:
array([ 1.,  0.,  0., ...,  0.,  0.,  0.])

In [11]:
train_label.shape


Out[11]:
(227845,)

In [12]:
test_feature.shape


Out[12]:
(56962, 29)

In [13]:
test_label.shape


Out[13]:
(56962,)

MinMaxScaler


In [14]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

scaler.fit(train_feature)
train_feature_trans = scaler.transform(train_feature)

scaler.fit(test_feature)
test_feature_trans = scaler.transform(test_feature)

In [15]:
train_feature_trans


Out[15]:
array([[  6.15096846e-01,   8.42768889e-01,   3.49045387e-01, ...,
          5.91827193e-01,   3.10742520e-01,   1.92591222e-02],
       [  9.57164719e-01,   7.04498838e-01,   7.79009635e-01, ...,
          6.51149915e-01,   3.44089730e-01,   2.75050238e-02],
       [  9.78658211e-01,   7.30777321e-01,   7.86038515e-01, ...,
          6.50297703e-01,   3.41252534e-01,   1.63934426e-03],
       ..., 
       [  9.17076826e-01,   7.34471857e-01,   8.52811137e-01, ...,
          6.49335756e-01,   3.45286425e-01,   4.13167636e-03],
       [  9.75408151e-01,   7.36124917e-01,   7.79726023e-01, ...,
          6.47311680e-01,   3.40305273e-01,   1.05764146e-04],
       [  9.50961060e-01,   7.46072410e-01,   7.35813956e-01, ...,
          6.31911596e-01,   3.33324393e-01,   4.01903755e-05]])

跑模型


In [28]:
######################### 建立模型
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout

import matplotlib.pyplot as plt 
def show_train_history(train_history,train,validation):
    plt.plot(train_history.history[train])
    plt.plot(train_history.history[validation])
    plt.title('Train History')
    plt.ylabel(train)
    plt.xlabel('Epoch')
    plt.legend(['train', 'validation'], loc='best')
    plt.show()

model = Sequential() #一層一層到底,按順序

#輸入層(隱藏層1)
model.add(Dense(units=200, 
                input_dim=29, 
                kernel_initializer='uniform', 
                activation='relu'))
model.add(Dropout(0.5))

#隱藏層2,不用寫input_dim,因為就是前一層的units
model.add(Dense(units=200,  
                kernel_initializer='uniform', 
                activation='relu'))
model.add(Dropout(0.5))

#輸出層
model.add(Dense(units=1, #輸出一個數字 
                kernel_initializer='uniform',
                activation='sigmoid'))

print(model.summary()) #可以清楚看到model還有參數數量

model.compile(loss='binary_crossentropy',   #二元用binary
              optimizer='adam', metrics=['accuracy'])

train_history = model.fit(x=train_feature_trans, y=train_label,  #上面多分割一步在keras是內建的
                          validation_split=0.5, epochs=20, 
                          batch_size=50000, verbose=2) #verbose=2表示顯示訓練過程

######################### 訓練過程視覺化
show_train_history(train_history,'acc','val_acc')
show_train_history(train_history,'loss','val_loss')


######################### 實際測驗得分
scores = model.evaluate(test_feature_trans, test_label)
print('\n')
print('accuracy=',scores[1])

######################### 紀錄模型預測情形(答案卷)
prediction = model.predict_classes(test_feature_trans)

#儲存訓練結果
#model.save_weights("Keras_CreditCardFraud_MLP.h5")
#print('model saved to disk')


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense_4 (Dense)              (None, 200)               6000      
_________________________________________________________________
dropout_3 (Dropout)          (None, 200)               0         
_________________________________________________________________
dense_5 (Dense)              (None, 200)               40200     
_________________________________________________________________
dropout_4 (Dropout)          (None, 200)               0         
_________________________________________________________________
dense_6 (Dense)              (None, 1)                 201       
=================================================================
Total params: 46,401
Trainable params: 46,401
Non-trainable params: 0
_________________________________________________________________
None
Train on 113922 samples, validate on 113923 samples
Epoch 1/20
0s - loss: 0.6872 - acc: 0.6792 - val_loss: 0.6523 - val_acc: 0.9983
Epoch 2/20
0s - loss: 0.6385 - acc: 0.9983 - val_loss: 0.5823 - val_acc: 0.9983
Epoch 3/20
0s - loss: 0.5604 - acc: 0.9983 - val_loss: 0.4748 - val_acc: 0.9983
Epoch 4/20
0s - loss: 0.4454 - acc: 0.9983 - val_loss: 0.3356 - val_acc: 0.9983
Epoch 5/20
0s - loss: 0.3044 - acc: 0.9983 - val_loss: 0.1934 - val_acc: 0.9983
Epoch 6/20
0s - loss: 0.1703 - acc: 0.9983 - val_loss: 0.0882 - val_acc: 0.9983
Epoch 7/20
0s - loss: 0.0779 - acc: 0.9983 - val_loss: 0.0355 - val_acc: 0.9983
Epoch 8/20
0s - loss: 0.0333 - acc: 0.9983 - val_loss: 0.0171 - val_acc: 0.9983
Epoch 9/20
0s - loss: 0.0174 - acc: 0.9983 - val_loss: 0.0125 - val_acc: 0.9983
Epoch 10/20
0s - loss: 0.0130 - acc: 0.9983 - val_loss: 0.0121 - val_acc: 0.9983
Epoch 11/20
0s - loss: 0.0124 - acc: 0.9983 - val_loss: 0.0127 - val_acc: 0.9983
Epoch 12/20
0s - loss: 0.0127 - acc: 0.9983 - val_loss: 0.0134 - val_acc: 0.9983
Epoch 13/20
0s - loss: 0.0131 - acc: 0.9983 - val_loss: 0.0140 - val_acc: 0.9983
Epoch 14/20
0s - loss: 0.0139 - acc: 0.9983 - val_loss: 0.0144 - val_acc: 0.9983
Epoch 15/20
0s - loss: 0.0141 - acc: 0.9983 - val_loss: 0.0146 - val_acc: 0.9983
Epoch 16/20
0s - loss: 0.0138 - acc: 0.9983 - val_loss: 0.0146 - val_acc: 0.9983
Epoch 17/20
0s - loss: 0.0145 - acc: 0.9983 - val_loss: 0.0145 - val_acc: 0.9983
Epoch 18/20
0s - loss: 0.0140 - acc: 0.9983 - val_loss: 0.0143 - val_acc: 0.9983
Epoch 19/20
0s - loss: 0.0136 - acc: 0.9983 - val_loss: 0.0140 - val_acc: 0.9983
Epoch 20/20
0s - loss: 0.0136 - acc: 0.9983 - val_loss: 0.0136 - val_acc: 0.9983
56544/56962 [============================>.] - ETA: 0s

accuracy= 0.998156665847
56544/56962 [============================>.] - ETA: 0s

Train/Test Score


In [29]:
df_ans = pd.DataFrame({'Real Class' :test_label})
df_ans['Prediction'] = prediction

In [30]:
df_ans[ df_ans['Real Class'] != df_ans['Prediction'] ]


Out[30]:
Real Class Prediction
1420 1.0 0
1572 1.0 0
1589 1.0 0
2112 1.0 0
3770 1.0 0
3796 1.0 0
4574 1.0 0
4810 1.0 0
5647 1.0 0
5849 1.0 0
5905 1.0 0
7283 1.0 0
7890 1.0 0
8577 1.0 0
8728 1.0 0
9045 1.0 0
9375 1.0 0
10094 1.0 0
10340 1.0 0
10456 1.0 0
11671 1.0 0
12122 1.0 0
12329 1.0 0
12383 1.0 0
12680 1.0 0
13780 1.0 0
15093 1.0 0
15111 1.0 0
15425 1.0 0
15497 1.0 0
... ... ...
40127 1.0 0
40189 1.0 0
40443 1.0 0
41980 1.0 0
42148 1.0 0
43621 1.0 0
43845 1.0 0
44510 1.0 0
45199 1.0 0
45222 1.0 0
45242 1.0 0
45279 1.0 0
45447 1.0 0
46987 1.0 0
47010 1.0 0
47158 1.0 0
47412 1.0 0
47689 1.0 0
49324 1.0 0
49794 1.0 0
49926 1.0 0
50133 1.0 0
50761 1.0 0
51688 1.0 0
51822 1.0 0
52246 1.0 0
53703 1.0 0
55933 1.0 0
56465 1.0 0
56767 1.0 0

105 rows × 2 columns


In [31]:
df_ans['Prediction'].value_counts() #沒有預測到1的,超爛


Out[31]:
0    56962
Name: Prediction, dtype: int64

In [32]:
df_ans['Real Class'].value_counts()


Out[32]:
0.0    56857
1.0      105
Name: Real Class, dtype: int64

In [33]:
prediction_train = model.predict_classes(train_feature)


226496/227845 [============================>.] - ETA: 0s

In [34]:
df_train_ans = pd.DataFrame({'Real Class' :train_label})
df_train_ans['Prediction'] = prediction_train

In [35]:
df_train_ans[ df_train_ans['Real Class'] != df_train_ans['Prediction'] ]


Out[35]:
Real Class Prediction
0 1.0 0
565 1.0 0
3025 1.0 0
3228 1.0 0
4178 1.0 0
4640 1.0 0
5100 1.0 0
5756 1.0 0
6054 1.0 0
6368 1.0 0
7011 1.0 0
7846 1.0 0
8090 1.0 0
8379 1.0 0
8456 1.0 0
9203 1.0 0
10279 1.0 0
10634 1.0 0
11066 1.0 0
11245 1.0 0
13509 1.0 0
16473 1.0 0
16564 1.0 0
16592 1.0 0
16839 1.0 0
17345 1.0 0
17388 1.0 0
18130 1.0 0
18544 1.0 0
18785 1.0 0
... ... ...
211413 1.0 0
212044 1.0 0
212428 1.0 0
212566 1.0 0
213192 1.0 0
213857 1.0 0
214939 1.0 0
215590 1.0 0
217172 1.0 0
217302 1.0 0
218002 1.0 0
218111 1.0 0
219398 1.0 0
219675 1.0 0
220316 1.0 0
220494 1.0 0
220676 1.0 0
220908 1.0 0
221153 1.0 0
221768 1.0 0
222246 1.0 0
223899 1.0 0
224143 1.0 0
224355 1.0 0
224761 1.0 0
224810 1.0 0
224957 1.0 0
226113 1.0 0
227489 1.0 0
227556 1.0 0

387 rows × 2 columns


In [36]:
df_train_ans['Prediction'].value_counts()


Out[36]:
0    227845
Name: Prediction, dtype: int64

In [37]:
df_train_ans['Real Class'].value_counts()


Out[37]:
0.0    227458
1.0       387
Name: Real Class, dtype: int64

Confusion Matrix


In [38]:
import seaborn as sns
%matplotlib inline

cols = ['Real_Class_1','Real_Class_0']  #Gold standard
rows = ['Prediction_1','Prediction_0'] #diagnostic tool (our prediction)

B1P1 = len(df_ans[(df_ans['Prediction'] == df_ans['Real Class']) & (df_ans['Real Class'] == 1)])
B1P0 = len(df_ans[(df_ans['Prediction'] != df_ans['Real Class']) & (df_ans['Real Class'] == 1)])
B0P1 = len(df_ans[(df_ans['Prediction'] != df_ans['Real Class']) & (df_ans['Real Class'] == 0)])
B0P0 = len(df_ans[(df_ans['Prediction'] == df_ans['Real Class']) & (df_ans['Real Class'] == 0)])

conf = np.array([[B1P1,B0P1],[B1P0,B0P0]])
df_cm = pd.DataFrame(conf, columns = [i for i in cols], index = [i for i in rows])

f, ax= plt.subplots(figsize = (5, 5))
sns.heatmap(df_cm, annot=True, ax=ax) 
ax.xaxis.set_ticks_position('top') #Making x label be on top is common in textbooks.

print('total test case number: ', np.sum(conf))


total test case number:  56962

In [39]:
def model_efficacy(conf):
    total_num = np.sum(conf)
    sen = conf[0][0]/(conf[0][0]+conf[1][0])
    spe = conf[1][1]/(conf[1][0]+conf[1][1])
    false_positive_rate = conf[0][1]/(conf[0][1]+conf[1][1])
    false_negative_rate = conf[1][0]/(conf[0][0]+conf[1][0])
    
    print('total_num: ',total_num)
    print('G1P1: ',conf[0][0]) #G = gold standard; P = prediction
    print('G0P1: ',conf[0][1])
    print('G1P0: ',conf[1][0])
    print('G0P0: ',conf[1][1])
    print('##########################')
    print('sensitivity: ',sen)
    print('specificity: ',spe)
    print('false_positive_rate: ',false_positive_rate)
    print('false_negative_rate: ',false_negative_rate)
    
    return total_num, sen, spe, false_positive_rate, false_negative_rate

model_efficacy(conf)


total_num:  56962
G1P1:  0
G0P1:  0
G1P0:  105
G0P0:  56857
##########################
sensitivity:  0.0
specificity:  0.998156665847
false_positive_rate:  0.0
false_negative_rate:  1.0
Out[39]:
(56962, 0.0, 0.99815666584740703, 0.0, 1.0)

其實滿失敗的,因為model通通都猜0


In [ ]: